# general visualisation
library('ggplot2') # visualisation
library('scales') # visualisation
library('patchwork') # visualisation
library('RColorBrewer') # visualisation
library('corrplot') # visualisation
# general data manipulation
library('dplyr') # data manipulation
library('readr') # input/output
library('vroom') # input/output
library('skimr') # overview
library('tibble') # data wrangling
library('tidyr') # data wrangling
library('purrr') # data wrangling
library('stringr') # string manipulation
library('forcats') # factor manipulation
# specific visualisation
library('alluvial') # visualisation
library('ggrepel') # visualisation
library('ggforce') # visualisation
library('ggridges') # visualisation
library('gganimate') # animations
library('GGally') # visualisation
library('ggthemes') # visualisation
library('wesanderson') # visualisation
library('kableExtra') # display
# Date + forecast
library('lubridate') # date and time
library('forecast') # time series analysis
library('prophet') # time series analysis
library('timetk') # time series analysis
# Interactivity
library('crosstalk')
library('plotly')
# parallel
library('foreach')
library('doParallel')
get_binCI <- function(x,n) as.list(setNames(binom.test(x,n)$conf.int, c("lwr", "upr")))
library(mgcv)
clean_pages <- vroom(str_c('clean_pages.csv'), delim = ",", col_types = cols())
session_info <- vroom(str_c('sess.csv'), delim = ",", col_types = cols())
signals <- vroom(str_c('signals.csv'), delim = ",", col_types = cols())
clean_signals <- vroom(str_c('csignals.csv'), delim = ",", col_types = cols())
signal_session <- signals %>%
group_by(userId, session) %>%
summarise_at(vars(signal), funs(mean(., na.rm=TRUE)))
set.seed(4321)
clean_pages <- clean_pages %>%
select(userId, time)
cols <- clean_pages %>%
distinct(userId) %>%
mutate(cols = rep_len(brewer.pal(7, "Set2"), length.out = n_distinct(clean_pages$userId)))
ts_out <- clean_pages %>%
left_join(cols, by = "userId") %>%
mutate(time = as.POSIXct(as.numeric(time) %% 86400, origin="1970-01-01", tz="GMT"))
NAs introduced by coercion
pal <- cols$cols %>%
setNames(cols$userId)
shared_ts <- highlight_key(ts_out)
palette(brewer.pal(100, "Set3"))
n too large, allowed maximum for palette Set3 is 12
Returning the palette you asked for with that many colors
gg <- shared_ts %>%
ggplot(aes(time, fill = userId, group = userId)) +
geom_histogram(bins=60) +
scale_color_manual(values = pal) +
labs(x = "Time", y = "Count") +
theme_tufte() +
NULL
filter2 <- bscols(
ggplotly(gg, dynamicTicks = TRUE),
widths = c(12, 12)
)
All elements of `...` must be named.
Did you want `key = c(key)`?Removed 3616 rows containing non-finite values (stat_bin).Too many widths provided to bscols; truncating
bscols(filter2)
signal_with_id <- clean_signals %>%
group_by(userId, word) %>%
mutate(count = sequence(n()))
p <- signals %>%
select(signal, pos) %>%
count(pos) %>%
add_tally(n, name = "total") %>%
mutate(perc = n/total) %>%
ggplot(aes(reorder(pos, n, FUN = min), perc, fill = pos)) +
geom_col() +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme_hc() +
theme(legend.position = "none") +
labs(x = "", y = "", title = "Original")
p2 <- signals %>%
filter(signal == 0) %>%
select(signal, pos) %>%
count(pos) %>%
add_tally(n, name = "total") %>%
mutate(perc = n/total) %>%
ggplot(aes(reorder(pos, n, FUN = min), perc, fill = pos)) +
geom_col() +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme_hc() +
theme(legend.position = "none") +
labs(x = "", y = "", title = "Unknown")
layout <- "
AAABBB
"
p + p2 + plot_layout(design = layout)

diff_df <- signal_with_id %>%
group_by(userId, word) %>%
mutate(diff = time - lag(time)) %>%
mutate(tran=paste(lag(signal),'->',signal)) %>%
ungroup() %>%
mutate(diff = diff/(60*60*24)) %>%
filter(diff < 15) %>%
filter(!(diff == 0 | is.na(diff))) %>%
select(word, diff, signal, tran)
p <- diff_df %>%
ggplot(aes(x=diff, color=tran)) +
stat_ecdf(geom="point", size=0.5)+
theme_hc() +
labs(x = "Days", y = "", title = "Wait Time CDF") +
scale_fill_discrete("")
p2 <- diff_df %>%
count(tran) %>%
add_tally(n, name = "total") %>%
mutate(perc = n/total) %>%
ggplot(aes(reorder(tran, n, FUN = min), perc, fill = tran)) +
geom_col() +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme_hc() +
theme(legend.position = "none") +
labs(x = "", y = "", title = "Transition Type")
p3 <- diff_df %>%
ggplot(aes(x=diff, fill='cdf', show.legend = FALSE)) +
stat_ecdf(aes(ymin=0,ymax=..y..), geom = "ribbon") +
theme_hc() +
theme(legend.position="none") +
labs(x = "Days", y = "", title = "Wait Time CDF by Transition Type")
layout <- "
AAACC
BBBBB
BBBBB
BBBBB
"
p3 + p + p2 + plot_layout(design = layout)

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMgZ2VuZXJhbCB2aXN1YWxpc2F0aW9uCmxpYnJhcnkoJ2dncGxvdDInKSAjIHZpc3VhbGlzYXRpb24KbGlicmFyeSgnc2NhbGVzJykgIyB2aXN1YWxpc2F0aW9uCmxpYnJhcnkoJ3BhdGNod29yaycpICMgdmlzdWFsaXNhdGlvbgpsaWJyYXJ5KCdSQ29sb3JCcmV3ZXInKSAjIHZpc3VhbGlzYXRpb24KbGlicmFyeSgnY29ycnBsb3QnKSAjIHZpc3VhbGlzYXRpb24KCiMgZ2VuZXJhbCBkYXRhIG1hbmlwdWxhdGlvbgpsaWJyYXJ5KCdkcGx5cicpICMgZGF0YSBtYW5pcHVsYXRpb24KbGlicmFyeSgncmVhZHInKSAjIGlucHV0L291dHB1dApsaWJyYXJ5KCd2cm9vbScpICMgaW5wdXQvb3V0cHV0CmxpYnJhcnkoJ3NraW1yJykgIyBvdmVydmlldwpsaWJyYXJ5KCd0aWJibGUnKSAjIGRhdGEgd3JhbmdsaW5nCmxpYnJhcnkoJ3RpZHlyJykgIyBkYXRhIHdyYW5nbGluZwpsaWJyYXJ5KCdwdXJycicpICMgZGF0YSB3cmFuZ2xpbmcKbGlicmFyeSgnc3RyaW5ncicpICMgc3RyaW5nIG1hbmlwdWxhdGlvbgpsaWJyYXJ5KCdmb3JjYXRzJykgIyBmYWN0b3IgbWFuaXB1bGF0aW9uCgojIHNwZWNpZmljIHZpc3VhbGlzYXRpb24KbGlicmFyeSgnYWxsdXZpYWwnKSAjIHZpc3VhbGlzYXRpb24KbGlicmFyeSgnZ2dyZXBlbCcpICMgdmlzdWFsaXNhdGlvbgpsaWJyYXJ5KCdnZ2ZvcmNlJykgIyB2aXN1YWxpc2F0aW9uCmxpYnJhcnkoJ2dncmlkZ2VzJykgIyB2aXN1YWxpc2F0aW9uCmxpYnJhcnkoJ2dnYW5pbWF0ZScpICMgYW5pbWF0aW9ucwpsaWJyYXJ5KCdHR2FsbHknKSAjIHZpc3VhbGlzYXRpb24KbGlicmFyeSgnZ2d0aGVtZXMnKSAjIHZpc3VhbGlzYXRpb24KbGlicmFyeSgnd2VzYW5kZXJzb24nKSAjIHZpc3VhbGlzYXRpb24KbGlicmFyeSgna2FibGVFeHRyYScpICMgZGlzcGxheQoKIyBEYXRlICsgZm9yZWNhc3QKbGlicmFyeSgnbHVicmlkYXRlJykgIyBkYXRlIGFuZCB0aW1lCmxpYnJhcnkoJ2ZvcmVjYXN0JykgIyB0aW1lIHNlcmllcyBhbmFseXNpcwpsaWJyYXJ5KCdwcm9waGV0JykgIyB0aW1lIHNlcmllcyBhbmFseXNpcwpsaWJyYXJ5KCd0aW1ldGsnKSAjIHRpbWUgc2VyaWVzIGFuYWx5c2lzCgojIEludGVyYWN0aXZpdHkKbGlicmFyeSgnY3Jvc3N0YWxrJykKbGlicmFyeSgncGxvdGx5JykKCiMgcGFyYWxsZWwKbGlicmFyeSgnZm9yZWFjaCcpCmxpYnJhcnkoJ2RvUGFyYWxsZWwnKQoKZ2V0X2JpbkNJIDwtIGZ1bmN0aW9uKHgsbikgYXMubGlzdChzZXROYW1lcyhiaW5vbS50ZXN0KHgsbikkY29uZi5pbnQsIGMoImx3ciIsICJ1cHIiKSkpCmBgYAoKYGBge3J9CgpsaWJyYXJ5KG1nY3YpCmNsZWFuX3BhZ2VzIDwtIHZyb29tKHN0cl9jKCdjbGVhbl9wYWdlcy5jc3YnKSwgZGVsaW0gPSAiLCIsIGNvbF90eXBlcyA9IGNvbHMoKSkKc2Vzc2lvbl9pbmZvIDwtIHZyb29tKHN0cl9jKCdzZXNzLmNzdicpLCBkZWxpbSA9ICIsIiwgY29sX3R5cGVzID0gY29scygpKQpzaWduYWxzIDwtIHZyb29tKHN0cl9jKCdzaWduYWxzLmNzdicpLCBkZWxpbSA9ICIsIiwgY29sX3R5cGVzID0gY29scygpKQpjbGVhbl9zaWduYWxzIDwtIHZyb29tKHN0cl9jKCdjc2lnbmFscy5jc3YnKSwgZGVsaW0gPSAiLCIsIGNvbF90eXBlcyA9IGNvbHMoKSkKYGBgCgpgYGB7cn0Kc2lnbmFsX3Nlc3Npb24gPC0gc2lnbmFscyAlPiUKICBncm91cF9ieSh1c2VySWQsIHNlc3Npb24pICU+JQogIHN1bW1hcmlzZV9hdCh2YXJzKHNpZ25hbCksIGZ1bnMobWVhbiguLCBuYS5ybT1UUlVFKSkpCmBgYAoKYGBge3J9CnNldC5zZWVkKDQzMjEpCgpjbGVhbl9wYWdlcyA8LSBjbGVhbl9wYWdlcyAlPiUKICBzZWxlY3QodXNlcklkLCB0aW1lKQoKY29scyA8LSBjbGVhbl9wYWdlcyAlPiUgCiAgZGlzdGluY3QodXNlcklkKSAlPiUgCiAgbXV0YXRlKGNvbHMgPSByZXBfbGVuKGJyZXdlci5wYWwoNywgIlNldDIiKSwgbGVuZ3RoLm91dCA9IG5fZGlzdGluY3QoY2xlYW5fcGFnZXMkdXNlcklkKSkpCgp0c19vdXQgPC0gY2xlYW5fcGFnZXMgJT4lIAogIGxlZnRfam9pbihjb2xzLCBieSA9ICJ1c2VySWQiKSAlPiUKICBtdXRhdGUodGltZSA9IGFzLlBPU0lYY3QoYXMubnVtZXJpYyh0aW1lKSAlJSA4NjQwMCwgb3JpZ2luPSIxOTcwLTAxLTAxIiwgdHo9IkdNVCIpKSAKCnBhbCA8LSBjb2xzJGNvbHMgJT4lCiAgIHNldE5hbWVzKGNvbHMkdXNlcklkKQoKc2hhcmVkX3RzIDwtIGhpZ2hsaWdodF9rZXkodHNfb3V0KQoKcGFsZXR0ZShicmV3ZXIucGFsKDEwMCwgIlNldDMiKSkKCmdnIDwtIHNoYXJlZF90cyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHRpbWUsIGZpbGwgPSB1c2VySWQsIGdyb3VwID0gdXNlcklkKSkgKwogIGdlb21faGlzdG9ncmFtKGJpbnM9NjApICsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gcGFsKSArCiAgbGFicyh4ID0gIlRpbWUiLCB5ID0gIkNvdW50IikgKwogIHRoZW1lX3R1ZnRlKCkgKyAKICBOVUxMCgpmaWx0ZXIyIDwtIGJzY29scygKICBnZ3Bsb3RseShnZywgZHluYW1pY1RpY2tzID0gVFJVRSksCiAgd2lkdGhzID0gYygxMiwgMTIpCikKCmJzY29scyhmaWx0ZXIyKQpgYGAKCmBgYHtyfQoKc2lnbmFsX3dpdGhfaWQgPC0gY2xlYW5fc2lnbmFscyAlPiUgCiAgZ3JvdXBfYnkodXNlcklkLCB3b3JkKSAlPiUgCiAgbXV0YXRlKGNvdW50ID0gc2VxdWVuY2UobigpKSkKCnAgPC0gc2lnbmFscyAlPiUgCiAgc2VsZWN0KHNpZ25hbCwgcG9zKSAlPiUgCiAgY291bnQocG9zKSAlPiUgCiAgYWRkX3RhbGx5KG4sIG5hbWUgPSAidG90YWwiKSAlPiUgCiAgbXV0YXRlKHBlcmMgPSBuL3RvdGFsKSAlPiUgCiAgZ2dwbG90KGFlcyhyZW9yZGVyKHBvcywgbiwgRlVOID0gbWluKSwgcGVyYywgZmlsbCA9IHBvcykpICsKICBnZW9tX2NvbCgpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gc2NhbGVzOjpwZXJjZW50KSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9oYygpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAibm9uZSIpICsKICBsYWJzKHggPSAiIiwgeSA9ICIiLCB0aXRsZSA9ICJPcmlnaW5hbCIpCgpwMiA8LSBzaWduYWxzICU+JSAKICBmaWx0ZXIoc2lnbmFsID09IDApICU+JSAKICBzZWxlY3Qoc2lnbmFsLCBwb3MpICU+JSAKICBjb3VudChwb3MpICU+JSAKICBhZGRfdGFsbHkobiwgbmFtZSA9ICJ0b3RhbCIpICU+JSAKICBtdXRhdGUocGVyYyA9IG4vdG90YWwpICU+JSAKICBnZ3Bsb3QoYWVzKHJlb3JkZXIocG9zLCBuLCBGVU4gPSBtaW4pLCBwZXJjLCBmaWxsID0gcG9zKSkgKwogIGdlb21fY29sKCkgKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBzY2FsZXM6OnBlcmNlbnQpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX2hjKCkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikgKwogIGxhYnMoeCA9ICIiLCB5ID0gIiIsIHRpdGxlID0gIlVua25vd24iKQoKbGF5b3V0IDwtICIKQUFBQkJCCiIKCnAgKyBwMiAgKyBwbG90X2xheW91dChkZXNpZ24gPSBsYXlvdXQpCmBgYAoKCmBgYHtyfQpkaWZmX2RmIDwtIHNpZ25hbF93aXRoX2lkICU+JQogICAgZ3JvdXBfYnkodXNlcklkLCB3b3JkKSAlPiUKICAgIG11dGF0ZShkaWZmID0gdGltZSAtIGxhZyh0aW1lKSkgJT4lCiAgICBtdXRhdGUodHJhbj1wYXN0ZShsYWcoc2lnbmFsKSwnLT4nLHNpZ25hbCkpICU+JQogICAgdW5ncm91cCgpICU+JQogICAgbXV0YXRlKGRpZmYgPSBkaWZmLyg2MCo2MCoyNCkpICU+JQogICAgZmlsdGVyKGRpZmYgPCAxNSkgJT4lCiAgICBmaWx0ZXIoIShkaWZmID09IDAgfCBpcy5uYShkaWZmKSkpICU+JQogICAgc2VsZWN0KHdvcmQsIGRpZmYsIHNpZ25hbCwgdHJhbikKCnAgPC0gZGlmZl9kZiAlPiUgCiAgZ2dwbG90KGFlcyh4PWRpZmYsIGNvbG9yPXRyYW4pKSArCiAgc3RhdF9lY2RmKGdlb209InBvaW50Iiwgc2l6ZT0wLjUpKwogIHRoZW1lX2hjKCkgKwogIGxhYnMoeCA9ICJEYXlzIiwgeSA9ICIiLCB0aXRsZSA9ICJXYWl0IFRpbWUgQ0RGIikgKyAKICBzY2FsZV9maWxsX2Rpc2NyZXRlKCIiKQoKcDIgPC0gZGlmZl9kZiAlPiUKICBjb3VudCh0cmFuKSAlPiUgCiAgYWRkX3RhbGx5KG4sIG5hbWUgPSAidG90YWwiKSAlPiUgCiAgbXV0YXRlKHBlcmMgPSBuL3RvdGFsKSAlPiUgCiAgZ2dwbG90KGFlcyhyZW9yZGVyKHRyYW4sIG4sIEZVTiA9IG1pbiksIHBlcmMsIGZpbGwgPSB0cmFuKSkgKwogIGdlb21fY29sKCkgKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBzY2FsZXM6OnBlcmNlbnQpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX2hjKCkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikgKwogIGxhYnMoeCA9ICIiLCB5ID0gIiIsIHRpdGxlID0gIlRyYW5zaXRpb24gVHlwZSIpCiAKCnAzIDwtIGRpZmZfZGYgJT4lIAogIGdncGxvdChhZXMoeD1kaWZmLCBmaWxsPSdjZGYnLCBzaG93LmxlZ2VuZCA9IEZBTFNFKSkgKwogIHN0YXRfZWNkZihhZXMoeW1pbj0wLHltYXg9Li55Li4pLCBnZW9tID0gInJpYmJvbiIpICsKICB0aGVtZV9oYygpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKSArCiAgbGFicyh4ID0gIkRheXMiLCB5ID0gIiIsIHRpdGxlID0gIldhaXQgVGltZSBDREYgYnkgVHJhbnNpdGlvbiBUeXBlIikgCgpsYXlvdXQgPC0gIgpBQUFDQwpCQkJCQgpCQkJCQgpCQkJCQgoiCgpwMyArIHAgKyBwMiArIHBsb3RfbGF5b3V0KGRlc2lnbiA9IGxheW91dCkKYGBgCgoK